---
output:

  bookdown::pdf_document2:
    toc: yes
    number_sections: yes
    pandoc_args: !expr rmdfiltr::add_wordcount_filter(rmdfiltr::add_citeproc_filter(args = NULL))
    latex_engine: xelatex
    extra_dependencies: ["float"]
always_allow_html: true

editor_options: 
  chunk_output_type: console

citeproc: no
fontfamily: mathpazo
fontsize: 11pt
geometry: margin=1in
indent: yes
link-citations: yes
linkcolor: blue
lang: 'en-US'



title: "Online Appendix for \"Strangers in the Homeland? The Academic Performance of U.S.-Born Children of Return Migrants in Mexico\""
author: |
  | Nathan I. Hoffmann
  | Department of Sociology, UCLA

date: "Population Research and Policy Review"

  
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = F, warning = F, message = F, cache = T)
options("yaml.eval.expr" = TRUE)


library(patchwork)
library(srvyr)
library(patchwork)
library(cem)
library(sandwich)
library(lmtest)
library(estimatr)
library(here)
library(knitr)
library(kableExtra)
library(broom)
library(sensemakr)
library(MatchIt)
library(stargazer)
library(haven)
library(tidyverse)

knit_hooks$set(inline = function(x) {
  prettyNum(x, big.mark=",")
})


options("yaml.eval.expr" = TRUE, scipen = 3, digits = 2)

uclablue = '#2774AE'
gray = '#808080'
black = '#000000'
ucla_palette = c(black, uclablue, gray)


theme_set(theme_classic(base_family = 'Palatino') + 
      theme(legend.title=element_blank(), 
         panel.grid.major.y = element_line('grey80'),
         legend.background = element_rect(fill = alpha("white", 0.5))
         ))
ggplot <- function(...) ggplot2::ggplot(...) + 
  scale_color_brewer(palette="Dark2") +
  scale_fill_brewer(palette="Dark2")

kable <- function(...) knitr::kable(..., format.args = list(big.mark = ","))
```

```{r load, include = F}
pisa_full <- readRDS(here('pisa_return.rds')) %>%
   filter(across(c(female,  mom_ed, dad_ed, early_ed, cultural_pos, home_ed, age),
                ~ !is.na(.x))) %>%
  mutate(school_id = paste0(year, school_id),
         village = school_location == 'Village',

         non_urban = factor(case_when(school_location %in% c('Village', 'Small Town', 'Town') ~ 'nonurban',
                               is.na(school_location) ~ 'missing',
                               T ~ 'urban'),
                            levels = c('urban', 'nonurban', 'missing')),
         family_structure = factor(ifelse(is.na(family_structure), 'missing', family_structure), 
                                   levels = c('two-parent family', 'single-parent family', 'does not live with parents', 'missing')),
         age_arrival = as.numeric(age_arrival),
         early_ed = case_when(early_ed %in% c('No', '0-1') ~ '1 year or less',
                              early_ed == '1+' ~ 'More than 1 year')
         )



pisa_mex_all <- pisa_full %>%
  filter(country == 'Mexico',
         birth_country %in% c('Mexico', 'United States of America'),
         mom_country == 'Mexico',
         dad_country == 'Mexico') %>%
  mutate(birth_country = ifelse(birth_country == 'United States of America', 'USA', as.character(birth_country)),
         treat = as.numeric(birth_country == 'USA'),
         region_mex = ifelse(is.na(region_mex) | region_mex == 'Undisclosed Stratum - Mexico', 'missing', region_mex),
         migrant_region = region_mex %in% c('Durango', 'Zacatecas', 'San Luis Potosi', 'Nayarit', 
                                            'Aguascalientes', 'Jalisco', 'Guanajuato', 'Colima', 
                                            'Michoacan'),
  ) %>%
  filter(age_arrival > 0 | is.na(age_arrival)) 

pisa_us <-  pisa_full %>%
  filter((country %in% c('United States', 'United States of America') & 
            birth_country == 'United States of America' &
            lang == 'Spanish' &
            mom_country == 'Another country (USA)' & 
            dad_country == 'Another country (USA)')) %>%
  mutate(country = 'USA',
         birth_country = 'USA')

pisa_mex_us_all <- bind_rows(
    filter(pisa_mex_all, birth_country == 'USA'),
    pisa_us
) %>%
  mutate(treat = as.numeric(country == 'Mexico')) %>%
  filter(age_arrival > 0 | is.na(age_arrival)) 


pisa_mex <- pisa_mex_all %>%
  filter(year %in% c(2009, 2012)) %>%
  filter(age_arrival > 0 | is.na(age_arrival)) 

pisa_mex_us <- pisa_mex_us_all %>%
  filter(year %in% c(2009, 2012)) %>%
  filter(age_arrival > 0 | is.na(age_arrival)) 


pisa_mex_nonmigrant <- pisa_mex_all %>%
  filter(year %in% c(2009, 2012)) %>%
  filter(age_arrival == 0 | is.na(age_arrival)) 

pisa_mex_us_nonmigrant <- pisa_mex_us_all %>%
  filter(year %in% c(2009, 2012)) %>%
  filter(age_arrival == 0 | is.na(age_arrival)) 


```

```{r functions}
rubin <- function(dataset, sample_label, covariates = NULL, pv_num = 5, 
                  treat = 'treat', weights = 'weight', cluster_var = 'school_id'){
  dataset_survey = as_survey_design(dataset, weights = weights, ids = cluster_var)
  
  dim_list <- list()
  method = ifelse(is.null(covariates), 'DIM', 'OLS')
  
  
  for(outcome in c('read', 'math', 'scie')){
    est_ols <- rep(NA, pv_num)
    var_ols <- rep(NA, pv_num)
    for(pv in 1:pv_num){
      if(is.null(covariates)){
        formula <- paste0(outcome, pv, ' ~ ', treat)
        } else{
          formula <- paste0(outcome, pv, ' ~ ', treat, ' + ', covariates)
        }

      lm_model <- survey::svyglm(formula = formula, design = dataset_survey)
      est_ols[pv] <- tidy(lm_model)[[2,2]]
      var_ols[pv] <- tidy(lm_model)[[2,3]]

    }
    est_out_ols <- mean(est_ols)
    var_samp_ols <- mean(var_ols)
    var_imp_ols <- sum((est_ols - est_out_ols)^2)/4
    se_out_ols <- sqrt(var_samp_ols + (1 + 1/5)*var_imp_ols)
    
    dim_list[[outcome]] <-
      data.frame(method = method,
                 sample = sample_label,
                 Outcome = outcome, 
                 Estimate = est_out_ols,
                 se = se_out_ols)
  }
  dim_list %>%
    bind_rows() %>%
    mutate(Outcome = recode_factor(Outcome,
                                 'read' = 'Reading',
                                 'math' = 'Math',
                                 'scie' = 'Science')) %>%
    return()
}

moderator_p <- function(df, cat1, cat2){
  out_list <- list()
  for(outcome in unique(df$Outcome)){
    b1 <- with(df, Estimate[sample == cat1 & Outcome == outcome])
    se1 <- with(df, se[sample == cat1 & Outcome == outcome])
    b2 <- with(df, Estimate[sample == cat2 & Outcome == outcome])
    se2 <- with(df, se[sample == cat2 & Outcome == outcome]) 
    
    z <- (b1 - b2)/sqrt(se1^2 + se2^2)
    p <- round(2*(1 - pnorm(abs(as.numeric(z)))), 3)
    
    out_list[[outcome]] <- data.frame(Outcome = outcome, dif = b1-b2, z = z, p = p)
  }
  return(bind_rows(out_list))
}
  
  

rubin_means <- function(dataset, labels = c(0,1), pv_num = 5, 
                  treat = 'treat', weights = 'weight', cluster_id = 'school_id'){
  dataset <- as_survey_design(dataset, 
                              ids = cluster_id,
                              weights = weights)
  mean_list <- list()
  
  for(outcome in c('read', 'math', 'scie')){
    mean_treat0 <- rep(NA, pv_num)
    mean_treat1 <- rep(NA, pv_num)
    var_treat0 <- rep(NA, pv_num)
    var_treat1 <- rep(NA, pv_num)
    
    for(pv in 1:pv_num){
      
      summary_df <- dataset %>%
        group_by(treat) %>%
        summarize(mean = survey_mean(!!sym(paste0(outcome, pv))),
                  var = survey_var(!!sym(paste0(outcome, pv))))
      
      mean_treat0[pv] <- summary_df[[1,2]]
      mean_treat1[pv] <- summary_df[[2,2]]
      
      var_treat0[pv] <- summary_df[[1,4]]
      var_treat1[pv] <- summary_df[[2,4]]
      
    }
    mean_out0 <- mean(mean_treat0)
    var_samp0 <- mean(var_treat0)
    sd_out0 <- sqrt(var_samp0)
    
    mean_out1 <- mean(mean_treat1)
    var_samp1 <- mean(var_treat1)
    sd_out1 <- sqrt(var_samp1)
    
    mean_list[[outcome]] <-
      data.frame(Outcome = outcome, 
                 label = c(labels[1], labels[2]),
                 n = c(nrow(filter(dataset, treat==0)), nrow(filter(dataset, treat==1))),
                 Mean = c(mean_out0, mean_out1),
                 SD = c(sd_out0, sd_out1)
                 )
  }
  mean_list %>%
    bind_rows() %>%
    mutate(Outcome = recode_factor(Outcome,
                                 'read' = 'Reading',
                                 'math' = 'Math',
                                 'scie' = 'Science')) %>%
    return()
}


stacked_bar <- function(data, x, fill){
  data %>%
    filter(!is.na(!!sym(x)) & !is.na(!!sym(fill))) %>%
    count(!!sym(x), !!sym(fill)) %>%
    ggplot(aes(x = !!sym(x), y = n, fill = !!sym(fill))) +
    geom_bar(position='fill', stat = 'identity') +
    theme(axis.text.x = element_text(angle = -30)) +
    labs(title = fill)
}

```

# (APPENDIX) Appendix {-} 

\newpage


# Full Regression Tables 

```{r functions-supp}

library(stargazer) 

rubin_lm <- function(dataset, covariates, pv_num = 5, 
                  treat = 'treat', weights = 'weight', cluster_var = 'school_id'){
  dataset_survey = as_survey_design(dataset, weights = weights, ids = cluster_var)

  lm_list <- list()
  se_list <- list()
  
  for(outcome in c('read', 'math', 'scie')){
    est_ols <- list()
    var_ols <- list()
    
    for(pv in 1:pv_num){
      formula <- paste0(outcome, pv, ' ~ ', treat, ' + ', covariates)
      lm_model <- survey::svyglm(formula = formula, design = dataset_survey)
      est_ols[[pv]] <- tidy(lm_model)$estimate
      var_ols[[pv]] <-  tidy(lm_model)$std.error
      names(est_ols[[pv]]) <- tidy(lm_model)$term
      names(var_ols[[pv]]) <- tidy(lm_model)$term
      #var_ols[pv] <- vcovHC(lm_model, type="HC1")[2,2]
    }
    est_out_ols <- rowMeans(simplify2array(est_ols))
    var_samp_ols <- rowMeans(simplify2array(var_ols))
    var_imp_ols <- lapply(est_ols, function(x){(x - est_out_ols)^2}) %>%
      simplify2array() %>%
      rowSums()/4
    se_out_ols <- sqrt(var_samp_ols + (1 + 1/5)*var_imp_ols)
    
    lm_final <- lm(reformulate(c('treat', covariates), paste0(outcome,1)), 
                   data = dataset)
    lm_final$coefficients <- est_out_ols
    
    lm_list[[outcome]] <- lm_final
    se_list[[outcome]] <- se_out_ols
  }
    return(list(estimates = lm_list, se_cluster = se_list))
}

```

```{r models-mex, results = 'asis'}
ols_mex_pre <- rubin_lm(pisa_mex, 'mom_ed + dad_ed + female + early_ed +
                cultural_pos + home_ed + age + year + migrant_region')


ols_mex_post <- rubin_lm(pisa_mex, 'mom_ed + dad_ed + female + early_ed +
                cultural_pos + home_ed + age + year + migrant_region + non_urban +
                wealth + escs + parent_isei + family_structure')

stargazer(list(ols_mex_pre$estimates, ols_mex_post$estimates),
          covariate.labels =
            c('Child of return migrants',
            'Mother\'s education',
            'Father\'s education',
            'Female',
            'Early education: 1+',
            'Cultural possessions',
            'Home educational envir.',
            'Age',
            'Year: 2012',
            'Migrant region',
            'Urban: non-urban',
            'Urban: missing',
            'Wealth',
            'ESCS',
            'Parent ISEI',
            'Family: single-parent',
            'Family: no parents',
            'Family: missing',
            '(Intercept)'),
          header = F,
          se = c(ols_mex_pre$se_cluster, ols_mex_post$se_cluster),
          keep.stat = c('n'),
          star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          star.char = c('†', '*', '**', '***'),
          no.space = T,
          dep.var.labels.include = F,
          column.labels = rep(c('Reading', 'Math', 'Science'), 2),
            notes = c("\\parbox[t]{\\textwidth}{\\textit{Note}: †\\textit{p}<0.1; *\\textit{p}<0.05; **\\textit{p}<0.01; ***\\textit{p}<0.001. OLS estimates comparing U.S.-born children of return migrants in Mexico to children in Mexico. All models cluster standard errors at the school level and incorporate sampling weights. ESCS stands for \"economic, social, and cultural status.\" Reference groups: year = 2009, early education = 1 year or less, urban = urban, family structure = two-parent family.}", 
                    "\\textit{Source}: PISA data from 2009 and 2012. Author's calculations."), 
          notes.label = '',
          notes.align = 'l',
          notes.append = F,
          font.size = 'footnotesize',
          table.placement = "H",
          label = 'tab:models-mex',
          title = 'Full table of OLS coefficients for Mexican sample')

```


```{r models-us, results = 'asis'}
ols_us_pre <- rubin_lm(pisa_mex_us, 'mom_ed + dad_ed + female + early_ed +
                       cultural_pos + home_ed + age + year')


ols_us_post <- rubin_lm(pisa_mex_us, 'mom_ed + dad_ed + female + early_ed +
                cultural_pos + home_ed + age + year + 
                wealth + escs + parent_isei + family_structure')

stargazer(list(ols_us_pre$estimates, ols_us_post$estimates),
          covariate.labels = 
            c('Child of return migrants',
              'Mother\'s education',
              'Father\'s education',
              'Female',
              'Early education: 1+',
              'Cultural possessions',
              'Home educational envir.',
              'Age',
              'Year: 2012',
              'Urban: non-urban',
              'Urban: missing',
              'Wealth',
              'ESCS',
              'Parent ISEI',
              'Family: single-parent',
              'Family: no parents',
              'Family: missing',
              '(Intercept)'),
          header = F,
          se = c(ols_us_pre$se_cluster, ols_us_post$se_cluster),
          keep.stat = c('n'),
          star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
          star.char = c('†', '*', '**', '***'),
          no.space = T,
          dep.var.labels.include = F,
          column.labels = rep(c('Reading', 'Math', 'Science'), 2),
            notes = c("\\parbox[t]{\\textwidth}{\\textit{Note}: †\\textit{p}<0.1; *\\textit{p}<0.05; **\\textit{p}<0.01; ***\\textit{p}<0.001. OLS estimates comparing U.S.-born children of return migrants in Mexico to children of Spanish-speaking immigrants in the U.S. All models cluster standard errors at the school level and incorporate sampling weights. Reference groups: year = 2009, early education = 1 year or less, urban = urban, family structure = two-parent family.}", 
                    "\\textit{Source}: PISA data from 2009 and 2012. Author's calculations."), 
          notes.label = '',
          notes.align = 'l',
          notes.append = F,
          font.size = 'small',
          table.placement = "H",
          label = 'tab:models-us',
          title = 'Full table of OLS coefficients for U.S. sample')

```


\newpage

# Sensitivity to Unobserved Confounders

Important variables might be correlated with both immigration (and return migration) as well as test scores. For the Mexican comparison, results are already close to 0, so this section focuses on the U.S. comparison. How strong would an unobserved confounder need to be in order to create a null effect for migration to Mexico for U.S.-born children of return migrants? I use the omitted variable bias (OVB) analysis tools of the `sensemakr` package Cinelli & Hazlett (2020) to help answer this question.  



```{r sens, results = 'asis', fig.cap = 'Contour plot of possible confounders of reading scores for the U.S. comparison. Contour lines represent t-values for the return migration coefficient in an OLS model for reading scores with eight additional covariates and hypothetical levels of confounding. "Unadjusted" shows the t-value of the immigration coefficient with no confounding. "10x parents\' ed." shows the t-value of the immigration coefficient after accounting for a hypothetical confounder ten times as strong as the join effects of parents\' education.'}

sens_out <- sensemakr(model = lm('read1 ~ treat + mom_ed + dad_ed + female + early_ed +
                                 cultural_pos + home_ed + age + year', data = pisa_mex_us),
                         data = pisa_mex_us,
                         treatment = "treat",
                         benchmark_covariates = list("parents' ed." = c('mom_ed', 'dad_ed')),
                         kd = 10)
plot(sens_out, sensitivity.of = 't-value')
```

```{r sens-tab, results = 'asis', eval = F}
ovb_minimal_reporting(sens_out, format = "latex", label = 'tab:sens-tab',
                      caption = "Omitted Variable Bias Minimal Reporting Table")
```


I find that an unobserved confounder would need to explain more than `r round(sens_out$sensitivity_stats$rv_q * 100, 1)` percent of the residual variance of both return migration and reading scores in order to bring the estimate of immigration down to 0. Figure \@ref(fig:sens) shows that a confounder even ten times as strong as the joint effect of mother's and father's education would still not produce a nonsignificant coefficient. The t-value in the original model is `r round(sens_out$sensitivity_stats$t_statistic, 1)`. This plot shows that a confounder even five times as strong as mother's education (`mom_ed`) would only reduce this t-value to `r round(sens_out$bounds$adjusted_t, 1)`, still significant at the $\alpha = 5$ percent level. Results are similar for science and math scores. 


\newpage



# Results over time, 2009-2018 

The 2009 and 2012 waves of PISA contain 4 plausible values, while the 2015 and 2018 data contain 10. In pooled analysis of these different test years, I follow Jerrim (2017) and Teltemann & Schunck (2020) in using five plausible values for all tests.

Since the 2015 and 2018 data do not have family structure or Mexican region, I do not control for these variables in this section.


```{r years-sample}
bind_rows(
  count(pisa_mex_all, year, treat) %>%
    mutate(treat = ifelse(treat == 1, 'U.S.-born children of return migrants in Mexico', 'Mexican comparison sample')),
  count(filter(pisa_mex_us_all, treat == 0), year) %>%
    mutate(treat = 'U.S. comparison sample')) %>%
  pivot_wider(names_from = 'treat', values_from = 'n') %>%
  select(Year = year, 'U.S.-born children of return migrants in Mexico', everything()) %>%
  kable(booktabs = T, caption = 'Samples sizes for PISA data in 2009, 2012, 2015, and 2018') %>%
  kable_styling(latex_options = "HOLD_position") %>%
  column_spec(2:4, width = "4cm")
```


\newpage

## Pooled 2009, 2012, 2015, and 2018

```{r pooled, results = 'hide', fig.height = 5.5, fig.cap = "Pooled 2009, 2012, 2015, and 2018. Error bars represent 95% asymptotic confidence intervals. All models cluster standard errors at the school level and incorporate sampling weights."}


dim_fig_mex_us <-
  bind_rows(rubin(pisa_mex_all, 'DIM'),
          rubin(pisa_mex_all, 'OLS (pre-migration)', 'mom_ed + dad_ed + female + early_ed +
                cultural_pos + home_ed + age + year'),
          rubin(pisa_mex_all, 'OLS (pre- & post-migration)',
                covariates = 'mom_ed + dad_ed + female + early_ed +
                cultural_pos + home_ed + age + year + non_urban +
                wealth + escs + parent_isei')
) %>%
  mutate(Outcome = recode_factor(Outcome,
                                 'read' = 'Reading',
                                 'math' = 'Math',
                                 'scie' = 'Science'),
            sample = factor(sample, 
                            levels = c('DIM', 'OLS (pre-migration)', 'OLS (pre- & post-migration)')))


dim_fig_us <-
  bind_rows(rubin(pisa_mex_us_all, 'DIM', weights = 'senate_weight'),
          rubin(pisa_mex_us_all, 'OLS (pre-migration)', 
                'mom_ed + dad_ed + female + early_ed +cultural_pos + home_ed + age + year',
                weights = 'senate_weight'),
          rubin(pisa_mex_us_all, 'OLS (pre- & post-migration)',
                covariates = 'mom_ed + dad_ed + female + early_ed +
                cultural_pos + home_ed + age + year + 
                wealth + escs + parent_isei', 
                weights = 'senate_weight')
          ) %>%
  mutate(Outcome = recode_factor(Outcome,
                                 'read' = 'Reading',
                                 'math' = 'Math',
                                 'scie' = 'Science'),
         sample = factor(sample, 
                            levels = c('DIM', 'OLS (pre-migration)', 'OLS (pre- & post-migration)'))) 



(dim_fig_mex_us %>%
  ggplot(aes(x = sample, y = Estimate, color = Outcome, shape = Outcome)) +
  geom_point(position = position_dodge(0.2)) +
  geom_errorbar(aes(ymin = Estimate - 1.96*se, ymax = Estimate + 1.96*se),
                width = 0, alpha = 1, position = position_dodge(0.2)) +
  geom_hline(yintercept = 0) +
  labs(x = '', y = 'Estimated difference in PISA score', title = 'Mexico comparison') +
  theme(#axis.text.x=element_text(angle=-15),
        legend.justification=c(1,0), 
        legend.position='none')) /
  (dim_fig_us %>%
  ggplot(aes(x = sample, y = Estimate, color = Outcome, shape = Outcome)) +
  geom_point(position = position_dodge(0.2)) +
  geom_errorbar(aes(ymin = Estimate - 1.96*se, ymax = Estimate + 1.96*se),
                width = 0, alpha = 1, position = position_dodge(0.2)) +
  geom_hline(yintercept = 0) +
  labs(x = '', y = 'Estimated difference in PISA score', title = 'U.S. comparison') +
  theme(#axis.text.x=element_text(angle=-15),
        legend.position = 'bottom'))

```

\newpage

## Separate results by survey year

```{r years, fig.height = 6, fig.cap = "Separate results by survey year. Error bars represent 95% asymptotic confidence intervals. All models cluster standard errors at the school level and incorporate sampling weights."}
mex_list <- list()

for(year_loop in c('2009', '2012', '2015', '2018')){
  pv_num_loop <- ifelse(year_loop %in% c('2009', '2012'), 5, 10)
  
  mex_list[[year_loop]] <-
    bind_rows(rubin(filter(pisa_mex_all, year == year_loop), 'DIM', pv_num = pv_num_loop),
            rubin(filter(pisa_mex_all, year == year_loop), 'OLS (pre-migration)', 'mom_ed + dad_ed + female + early_ed +
                  cultural_pos + home_ed + age', pv_num = pv_num_loop),
            rubin(filter(pisa_mex_all, year == year_loop), 'OLS (pre- & post-migration)',
                  covariates = 'mom_ed + dad_ed + female + early_ed +
                  cultural_pos + home_ed + age + non_urban +
                  wealth + escs + parent_isei', pv_num = pv_num_loop)
  ) %>%
    mutate(Outcome = recode_factor(Outcome,
                                   'read' = 'Reading',
                                   'math' = 'Math',
                                   'scie' = 'Science'),
              sample = factor(sample, 
                              levels = c('DIM', 'OLS (pre-migration)', 'OLS (pre- & post-migration)')),
           year = year_loop)
}


us_list <- list()

for(year_loop in c('2009', '2012', '2015', '2018')){
  pv_num_loop <- ifelse(year_loop %in% c('2009', '2012'), 5, 10)
  
  us_list[[year_loop]] <-
    bind_rows(rubin(filter(pisa_mex_us_all, year == year_loop), 'DIM', weights = 'senate_weight', pv_num = pv_num_loop),
            rubin(filter(pisa_mex_us_all, year == year_loop), 'OLS (pre-migration)', 'mom_ed + 
            dad_ed + female + early_ed + cultural_pos + home_ed + age',
            weights = 'senate_weight', pv_num = pv_num_loop),
            rubin(filter(pisa_mex_us_all, year == year_loop), 'OLS (pre- & post-migration)',
                  covariates = 'mom_ed + dad_ed + female + early_ed +
                  cultural_pos + home_ed + age + non_urban +
                  wealth + escs + parent_isei', weights = 'senate_weight', pv_num = pv_num_loop)
  ) %>%
    mutate(Outcome = recode_factor(Outcome,
                                   'read' = 'Reading',
                                   'math' = 'Math',
                                   'scie' = 'Science'),
              sample = factor(sample, 
                              levels = c('DIM', 'OLS (pre-migration)', 'OLS (pre- & post-migration)')),
           year = year_loop)
}

bind_rows(
  mutate(bind_rows(mex_list), comparison = 'Mexico'),
  mutate(bind_rows(us_list), comparison = 'USA')) %>%
  ggplot(aes(x = year, y = Estimate, color = Outcome, shape = Outcome)) +
  geom_point(position = position_dodge(0.2)) +
  geom_errorbar(aes(ymin = Estimate - 1.96*se, ymax = Estimate + 1.96*se),
                width = 0, alpha = 1, position = position_dodge(0.2)) +
  geom_hline(yintercept = 0) +
  labs(x = '', y = 'Estimated difference in PISA score') +
  facet_wrap(~comparison + sample, scales = 'free_x') +
  theme(#axis.text.x=element_text(angle=-15),
    legend.position = 'bottom')    
    #legend.position=c(.8,.2))
```





